data <- readRDS(file="../../data/TrainP1.RDS")
Let’s create a column to identify unique site + year of measurement (= different age of the trees).
data$site_age <- paste0(data$site,data$age)
Keeping the columns of interest
Some variables have no biological interest, we can remove them: tmx_min_1y_site and tmn_max_1y_site
Minimum precipitation in Portugal are equal to 1.2mm for three measurments. But it corresponds to precipitation in winter (in March if I well remembered). And trees are in dormancy in winter, and are not affected (or at least less) by the lack of water. So better to use other variable like pre_summer_min_site which corresponds to the precipitation during the previous summer.
data <- data[,c("site_age",grep("y_site",colnames(data),value=T),grep("summer_min",colnames(data),value=T))]
# Removing of variables with no biologcial interest
data[,c("tmx_min_1y_site","tmn_max_1y_site")] <- NULL
# Very weak precipitation in Portugal in winter. Let's remove this variable.
table(data$pre_min_1y_site,data$site_age)
##
## asturias10 asturias21 asturias37 bordeaux25 bordeaux37 caceres8 madrid13 portugal11 portugal15 portugal20 portugal27
## 1.2 0 0 0 0 0 0 0 0 2833 2078 2001
## 1.5 0 0 0 0 0 249 0 0 0 0 0
## 2.2 0 0 0 0 0 0 807 0 0 0 0
## 3.5 0 0 0 0 0 0 0 3102 0 0 0
## 11.1 0 0 2963 0 0 0 0 0 0 0 0
## 18 0 3022 0 0 0 0 0 0 0 0 0
## 21.8 2949 0 0 0 0 0 0 0 0 0 0
## 26.7 0 0 0 0 2416 0 0 0 0 0 0
## 45.9 0 0 0 2420 0 0 0 0 0 0 0
data[,c("pre_min_1y_site")] <- NULL
Keep only unique values:
data <- unique(data)
Checking NAs
sapply(data, function(x) sum(is.na(x)))
## site_age tmn_mean_1y_site tmx_mean_1y_site pre_mean_1y_site pet_mean_1y_site ppet_mean_1y_site tmn_min_1y_site
## 0 0 0 0 0 0 0
## pet_min_1y_site ppet_min_1y_site tmx_max_1y_site pre_max_1y_site pet_max_1y_site ppet_max_1y_site pre_summer_min_site
## 0 0 0 0 0 0 0
Mean-centered variables
data[,paste0(grep("_site",names(data),value = T),".sc")] <- scale(data[,grep("_site",names(data),value = T)])
cor <- cor(data[,grep("_site.sc",names(data),value = T)])
corrplot::corrplot(cor,method="number")
# matrix of the p-value of the correlation
p.mat <- corpmat(cor)
corrplot::corrplot(cor, method="color", col=col(200),
type="upper", order="hclust",
addCoef.col = "black", # Add coefficient of correlation
tl.col="black", tl.srt=45, #Text label color and rotation
# Combine with significance
p.mat = p.mat, sig.level = 0.01, insig = "blank",
# hide correlation coefficient on the principal diagonal
diag=FALSE)
pca <- prcomp(data[,grep("_site.sc",names(data),value = T)], center = TRUE,scale. = TRUE)
ggbiplot(pca,varname.size =4) + ylim(-4.5, 2.5) + xlim(-3, 3) +
theme_minimal(base_size = 25) +
theme(plot.title = element_text(size=18))
Based on the exploratory analyses, we selected 6 variables describing the climatic conditions between test test sites (3 precipitation-related variables and 3 temperature-related variables).
pre_summer_min_site (min.presummer in the manuscript): the minimum of the monthly precipitation during summer -June to September- (°C)
pre_mean_1y_site (mean.pre in the manuscript): the mean of the monthly precipitation (mm)
pre_max_1y_site (max.pre in the manuscript): the maximum of the monthly precipitation ( mm)
tmn_min_1y_site (min.tmn in the manuscript): the minimum of the monthly minimum temperatures (°C)
tmx_mean_1y_site.sc (mean.tmax in the manuscript): the mean of monthly maximum temperatures (°C)
tmx_max_1y_site (max.tmx in the manuscript): the maximum of the monthly maximum temperatures (°C)
select.var <- c("pre_summer_min_site.sc","pre_mean_1y_site.sc","tmn_min_1y_site.sc",
"tmx_max_1y_site.sc","pre_max_1y_site.sc","tmx_mean_1y_site.sc")
cor(data[,select.var])
## pre_summer_min_site.sc pre_mean_1y_site.sc tmn_min_1y_site.sc tmx_max_1y_site.sc pre_max_1y_site.sc tmx_mean_1y_site.sc
## pre_summer_min_site.sc 1.00000000 0.2602013 -0.35770738 -0.48980632 -0.09170046 -0.3607577
## pre_mean_1y_site.sc 0.26020126 1.0000000 0.18778322 -0.33506384 0.84626726 -0.7841594
## tmn_min_1y_site.sc -0.35770738 0.1877832 1.00000000 0.01391504 0.41383912 0.1177338
## tmx_max_1y_site.sc -0.48980632 -0.3350638 0.01391504 1.00000000 -0.24036904 0.6970473
## pre_max_1y_site.sc -0.09170046 0.8462673 0.41383912 -0.24036904 1.00000000 -0.6456770
## tmx_mean_1y_site.sc -0.36075766 -0.7841594 0.11773381 0.69704730 -0.64567704 1.0000000
vif(data[,select.var])
## Variables VIF
## 1 pre_summer_min_site.sc 2.643798
## 2 pre_mean_1y_site.sc 10.102568
## 3 tmn_min_1y_site.sc 2.715171
## 4 tmx_max_1y_site.sc 4.833535
## 5 pre_max_1y_site.sc 6.751129
## 6 tmx_mean_1y_site.sc 11.315133
2 options:
A matrix of Euclidean distance following Thomson et al. (2018).
A variance-covariance matrix following Jarquín et al. (2014).
dat <- unique(data[,c("site_age",select.var)])
row.names(dat) <- dat$site_age
dat$site_age <- NULL
round(dat,3)
## pre_summer_min_site.sc pre_mean_1y_site.sc tmn_min_1y_site.sc tmx_max_1y_site.sc pre_max_1y_site.sc tmx_mean_1y_site.sc
## portugal11 -0.770 0.373 0.625 -0.043 0.801 -0.414
## bordeaux37 0.876 0.395 0.625 -0.878 -0.129 0.327
## asturias21 0.833 -0.326 -0.444 -1.012 -0.322 -0.777
## asturias37 -0.231 0.167 -0.750 -0.811 -0.199 -0.726
## bordeaux25 2.238 0.694 -1.360 0.158 -0.066 -0.418
## portugal27 -0.465 2.218 0.625 0.258 1.860 -1.489
## portugal15 -0.770 -0.282 0.625 -0.043 0.801 0.337
## asturias10 0.528 -0.552 0.167 -1.245 -0.782 -0.349
## portugal20 -0.465 0.061 0.625 0.258 0.801 0.222
## madrid13 -0.862 -1.422 -1.971 1.361 -1.473 1.138
## caceres8 -0.912 -1.325 1.235 1.996 -1.293 2.149
Following Thomson et al. (2018).
I did not select this option in the manuscript as I did not know how to include a euclidean matrix in brms.
euclimat <-as.matrix(dist(dat, method="euclidean", diag=TRUE,upper=TRUE))
round(euclimat,3)
## portugal11 bordeaux37 asturias21 asturias37 bordeaux25 portugal27 portugal15 asturias10 portugal20 madrid13 caceres8
## portugal11 0.000 2.196 2.556 1.977 3.726 2.421 0.996 2.590 0.827 4.418 4.288
## bordeaux37 2.196 0.000 1.715 2.070 2.742 3.697 2.175 1.499 2.019 4.532 4.412
## asturias21 2.556 1.715 0.000 1.236 2.327 4.023 2.675 0.984 2.617 4.127 5.042
## asturias37 1.977 2.070 1.236 0.000 2.793 3.480 2.259 1.614 2.235 3.773 4.897
## bordeaux25 3.726 2.742 2.327 2.793 0.000 4.294 3.912 3.049 3.581 4.507 5.676
## portugal27 2.421 3.697 4.023 3.480 4.294 0.000 3.300 4.406 2.949 6.275 6.271
## portugal15 0.996 2.175 2.675 2.259 3.912 3.300 0.000 2.528 0.561 3.978 3.648
## asturias10 2.590 1.499 0.984 1.614 3.049 4.406 2.528 0.000 2.582 4.092 4.563
## portugal20 0.827 2.019 2.617 2.235 3.581 2.949 0.561 2.582 0.000 4.040 3.690
## madrid13 4.418 4.532 4.127 3.773 4.507 6.275 3.978 4.092 4.040 0.000 3.428
## caceres8 4.288 4.412 5.042 4.897 5.676 6.271 3.648 4.563 3.690 3.428 0.000
# then scales so that the values are between 0 and 1
euclimat <- 1- euclimat/max(euclimat, na.rm=TRUE)
round(euclimat,3)
## portugal11 bordeaux37 asturias21 asturias37 bordeaux25 portugal27 portugal15 asturias10 portugal20 madrid13 caceres8
## portugal11 1.000 0.650 0.593 0.685 0.406 0.614 0.841 0.587 0.868 0.296 0.317
## bordeaux37 0.650 1.000 0.727 0.670 0.563 0.411 0.653 0.761 0.678 0.278 0.297
## asturias21 0.593 0.727 1.000 0.803 0.629 0.359 0.574 0.843 0.583 0.342 0.196
## asturias37 0.685 0.670 0.803 1.000 0.555 0.445 0.640 0.743 0.644 0.399 0.220
## bordeaux25 0.406 0.563 0.629 0.555 1.000 0.316 0.376 0.514 0.429 0.282 0.095
## portugal27 0.614 0.411 0.359 0.445 0.316 1.000 0.474 0.298 0.530 0.000 0.001
## portugal15 0.841 0.653 0.574 0.640 0.376 0.474 1.000 0.597 0.911 0.366 0.419
## asturias10 0.587 0.761 0.843 0.743 0.514 0.298 0.597 1.000 0.589 0.348 0.273
## portugal20 0.868 0.678 0.583 0.644 0.429 0.530 0.911 0.589 1.000 0.356 0.412
## madrid13 0.296 0.278 0.342 0.399 0.282 0.000 0.366 0.348 0.356 1.000 0.454
## caceres8 0.317 0.297 0.196 0.220 0.095 0.001 0.419 0.273 0.412 0.454 1.000
# saveRDS(euclimat, file = "../../data/EnvMat/EucliMatrixSites.rds")
Following Jarquín et al. (2014).
This is the option used in the mannuscript.
varmat <-as.matrix(var(t(dat)))
round(varmat,3)
## portugal11 bordeaux37 asturias21 asturias37 bordeaux25 portugal27 portugal15 asturias10 portugal20 madrid13 caceres8
## portugal11 0.377 -0.091 -0.148 0.038 -0.464 0.677 0.264 -0.145 0.233 -0.500 -0.241
## bordeaux37 -0.091 0.392 0.283 0.082 0.173 -0.141 -0.097 0.376 -0.111 -0.505 -0.362
## asturias21 -0.148 0.283 0.406 0.133 0.571 -0.013 -0.215 0.313 -0.173 -0.454 -0.704
## asturias37 0.038 0.082 0.133 0.157 0.267 0.358 -0.085 0.026 -0.048 -0.314 -0.615
## bordeaux25 -0.464 0.173 0.571 0.267 1.458 -0.087 -0.622 0.207 -0.453 0.054 -1.105
## portugal27 0.677 -0.141 -0.013 0.358 -0.087 1.952 0.153 -0.301 0.243 -1.290 -1.552
## portugal15 0.264 -0.097 -0.215 -0.085 -0.622 0.153 0.349 -0.118 0.254 -0.132 0.249
## asturias10 -0.145 0.376 0.313 0.026 0.207 -0.301 -0.118 0.413 -0.128 -0.428 -0.216
## portugal20 0.233 -0.111 -0.173 -0.048 -0.453 0.243 0.254 -0.128 0.198 -0.137 0.121
## madrid13 -0.500 -0.505 -0.454 -0.314 0.054 -1.290 -0.132 -0.428 -0.137 2.046 1.660
## caceres8 -0.241 -0.362 -0.704 -0.615 -1.105 -1.552 0.249 -0.216 0.121 1.660 2.764
heatmap(varmat)
heatmap.2(as.matrix(varmat),scale="none",
col=brewer.pal(11,"RdBu"),
trace="none",key=F,
cexRow=1.5,cexCol=1.5,
margins=c(12,8))
## png
## 2
is.symmetric.matrix(varmat)
## [1] TRUE
is.positive.definite(varmat)
## [1] FALSE
varmat.pd <- nearPD(varmat)$mat
all.equal(as.matrix(varmat.pd),varmat)
## [1] "Mean relative difference: 6.132157e-08"
varmat.pd
## 11 x 11 Matrix of class "dpoMatrix"
## portugal11 bordeaux37 asturias21 asturias37 bordeaux25 portugal27 portugal15 asturias10 portugal20 madrid13 caceres8
## portugal11 0.37658364 -0.09087853 -0.14778865 0.03818940 -0.46399677 0.67685666 0.26374250 -0.14499814 0.23340816 -0.49965785 -0.2414604
## bordeaux37 -0.09087853 0.39232737 0.28277970 0.08248928 0.17315267 -0.14071847 -0.09744160 0.37626338 -0.11098329 -0.50535525 -0.3616352
## asturias21 -0.14778865 0.28277970 0.40610737 0.13291648 0.57101630 -0.01306592 -0.21515485 0.31334658 -0.17274620 -0.45372608 -0.7036847
## asturias37 0.03818940 0.08248928 0.13291648 0.15692458 0.26735438 0.35788464 -0.08464053 0.02610159 -0.04843884 -0.31413317 -0.6146478
## bordeaux25 -0.46399677 0.17315267 0.57101630 0.26735438 1.45764631 -0.08693648 -0.62168885 0.20736731 -0.45296002 0.05401691 -1.1049717
## portugal27 0.67685666 -0.14071847 -0.01306592 0.35788464 -0.08693648 1.95226132 0.15312127 -0.30085527 0.24322264 -1.28966724 -1.5521032
## portugal15 0.26374250 -0.09744160 -0.21515485 -0.08464053 -0.62168885 0.15312127 0.34914900 -0.11780878 0.25390443 -0.13217700 0.2489944
## asturias10 -0.14499814 0.37626338 0.31334658 0.02610159 0.20736731 -0.30085527 -0.11780878 0.41289755 -0.12830705 -0.42815314 -0.2158540
## portugal20 0.23340816 -0.11098329 -0.17274620 -0.04843884 -0.45296002 0.24322264 0.25390443 -0.12830705 0.19839326 -0.13698670 0.1214936
## madrid13 -0.49965785 -0.50535525 -0.45372608 -0.31413317 0.05401691 -1.28966724 -0.13217700 -0.42815314 -0.13698670 2.04599152 1.6598482
## caceres8 -0.24146040 -0.36163522 -0.70368468 -0.61464778 -1.10497168 -1.55210319 0.24899444 -0.21585399 0.12149365 1.65984823 2.7640208
# saveRDS(varmat.pd, file = "../../data/cleaned-data/EnvMat/VarCovMatSites.rds")
data <- readRDS(file="../../data/TrainP1.RDS")
Keeping the columns of interest
data <- data[,c("prov",grep("_prov",colnames(data),value=T))]
data <- data[,-(grep("tude",colnames(data)))]
# Removing soil variables
data <- data[,-(grep("_sub_",colnames(data)))]
data <- data[,-(grep("_top_",colnames(data)))]
data <- data[,-(grep("_roots_",colnames(data)))]
Keep only unique values:
data <- unique(data)
Checking NAs
sapply(data, function(x) sum(is.na(x)))
## prov bio1_prov bio2_prov bio5_prov bio6_prov bio12_prov bio13_prov bio14_prov tmean.djf_prov tmean.mam_prov
## 0 0 0 0 0 0 0 0 0 0
## tmean.jja_prov tmean.son_prov prec.djf_prov prec.mam_prov prec.jja_prov prec.son_prov pet.mean_prov pet.min_prov pet.max_prov ppet.mean_prov
## 0 0 0 0 0 0 0 0 0 0
## ppet.min_prov ppet.max_prov
## 0 0
Mean-centered variables
data[,paste0(grep("_prov",names(data),value = T),".sc")] <- scale(data[,grep("_prov",names(data),value = T)])
cor <- cor(data[,grep("_prov.sc",names(data),value = T)])
corrplot::corrplot(cor,method="number")
# matrix of the p-value of the correlation
p.mat <- corpmat(cor)
corrplot::corrplot(cor, method="color", col=col(200),
type="upper", order="hclust",
addCoef.col = "black", # Add coefficient of correlation
tl.col="black", tl.srt=45, #Text label color and rotation
# Combine with significance
p.mat = p.mat, sig.level = 0.01, insig = "blank",
# hide correlation coefficient on the principal diagonal
diag=FALSE)
pca <- prcomp(data[,grep("_prov.sc",names(data),value = T)], center = TRUE,scale. = TRUE)
ggbiplot(pca,varname.size =4) + ylim(-4.5, 2.5) + xlim(-3, 3) +
theme_minimal(base_size = 25) +
theme(plot.title = element_text(size=18))
We didn’t keep bio13 because it was highly correlated to bio12. And we didn’t keep bio6 because it was correlated to bio1.
cat(" data:","\n","corr bio12-bio13: ",cor(data$bio12_prov.sc,data$bio13_prov.sc), "\n",
"corr bio5-bio6: ",cor(data$bio1_prov.sc,data$bio6_prov.sc),"\n","\n")
## data:
## corr bio12-bio13: 0.9629376
## corr bio5-bio6: 0.7811049
##
Selected variables:
bio1_prov.s (mean.temp in the manuscript): the average of the annual daily mean temperature (°C).
bio5_prov.sc (max.temp in the manuscript): the average of the maximum temperature of the warmest month (°C).
bio12_prov.sc (min.pre in the manuscript): the average of the precipitation of the driest month (mm).
bio14_prov.sc (mean.pre in the manuscript): the average of the annual precipitation (mm).
select.var <- paste0("bio",c(1,5,12,14),"_prov.sc")
cor(data[,select.var])
## bio1_prov.sc bio5_prov.sc bio12_prov.sc bio14_prov.sc
## bio1_prov.sc 1.000000000 -0.06119027 0.1806672 0.001350035
## bio5_prov.sc -0.061190273 1.00000000 -0.6213026 -0.725394044
## bio12_prov.sc 0.180667187 -0.62130263 1.0000000 0.645110058
## bio14_prov.sc 0.001350035 -0.72539404 0.6451101 1.000000000
vif(data[,select.var])
## Variables VIF
## 1 bio1_prov.sc 1.059054
## 2 bio5_prov.sc 2.307608
## 3 bio12_prov.sc 1.967315
## 4 bio14_prov.sc 2.474264
2 options:
A matrix of Euclidean distance following Thomson et al. (2018).
A variance-covariance matrix following Jarquín et al. (2014).
dat <- unique(data[,c("prov",select.var)])
row.names(dat) <- dat$prov
dat$prov <- NULL
round(dat,3)
## bio1_prov.sc bio5_prov.sc bio12_prov.sc bio14_prov.sc
## MIM 0.486 -0.590 1.626 2.033
## CEN -1.184 0.943 -1.111 -0.823
## ORI -0.235 1.249 -0.860 -0.942
## STJ -0.365 -1.207 0.419 1.042
## HOU 0.076 -0.596 0.786 1.561
## CUE -0.259 0.999 -1.262 -0.838
## TAM -1.149 1.614 -0.024 -1.107
## LEI 1.906 -0.899 0.154 -1.103
## VER 0.153 -0.661 0.481 1.373
## CAS 0.898 -0.706 0.354 0.325
## PIA 2.255 0.373 -0.592 -1.141
## ARM -0.638 -0.788 0.440 0.590
## PET 0.484 -0.598 1.677 2.107
## VAL -0.371 1.164 -1.234 -0.856
## SAL -1.989 0.153 -0.627 0.033
## OLO -0.301 -1.132 0.390 1.097
## CAD 0.537 -0.821 0.379 0.631
## ARN 0.868 2.004 -1.339 -1.096
## BAY -1.127 0.680 -0.899 -0.244
## SIE 0.532 -0.785 0.359 0.568
## SEG 0.627 -0.636 1.966 0.590
## PLE -1.053 -1.568 0.296 0.987
## BON -1.437 0.515 -0.925 -0.412
## COC -0.411 0.966 -1.256 -0.838
## SAC 0.159 -0.636 2.213 0.182
## QUA 1.992 1.005 -0.971 -0.886
## CAR -0.568 0.810 -1.145 -0.831
## OLB -1.000 -0.212 -0.938 -0.207
## PIE -0.959 -1.251 0.092 -0.932
## PUE 0.898 -0.711 0.288 0.455
## ALT -0.274 -0.680 0.343 0.471
## LAM 0.828 -0.822 0.317 0.700
## MAD -0.425 1.169 1.400 -1.303
## COM 1.048 1.656 -0.796 -1.186
Following Thomson et al. (2018).
I did not select this option in the manuscript as I did not know how to include a euclidean matrix in brms.
euclimat <-as.matrix(dist(dat, method="euclidean", diag=TRUE,upper=TRUE))
round(euclimat,3)[1:10,1:10]
## MIM CEN ORI STJ HOU CUE TAM LEI VER CAS
## MIM 0.000 4.559 4.351 1.882 1.047 4.434 4.484 3.756 1.364 2.172
## CEN 4.559 0.000 1.035 3.334 3.639 0.939 1.310 3.823 3.423 3.243
## ORI 4.351 1.035 0.000 3.409 3.532 0.485 1.302 3.202 3.310 2.861
## STJ 1.882 3.334 3.409 0.000 0.986 3.353 3.659 3.150 0.825 1.538
## HOU 1.047 3.639 3.532 0.986 0.000 3.551 3.763 3.307 0.372 1.550
## CUE 4.434 0.939 0.485 3.353 3.551 0.000 1.666 3.220 3.294 2.865
## TAM 4.484 1.310 1.302 3.659 3.763 1.666 0.000 3.960 3.644 3.430
## LEI 3.756 3.823 3.202 3.150 3.307 3.220 3.960 0.000 3.061 1.770
## VER 1.364 3.423 3.310 0.825 0.372 3.294 3.644 3.061 0.000 1.293
## CAS 2.172 3.243 2.861 1.538 1.550 2.865 3.430 1.770 1.293 0.000
# then scales so that the values are between 0 and 1
euclimat <- 1- euclimat/max(euclimat, na.rm=TRUE)
round(euclimat,3)[1:10,1:10]
## MIM CEN ORI STJ HOU CUE TAM LEI VER CAS
## MIM 1.000 0.110 0.151 0.633 0.796 0.135 0.125 0.267 0.734 0.576
## CEN 0.110 1.000 0.798 0.349 0.290 0.817 0.744 0.254 0.332 0.367
## ORI 0.151 0.798 1.000 0.335 0.311 0.905 0.746 0.375 0.354 0.442
## STJ 0.633 0.349 0.335 1.000 0.808 0.346 0.286 0.385 0.839 0.700
## HOU 0.796 0.290 0.311 0.808 1.000 0.307 0.266 0.355 0.927 0.698
## CUE 0.135 0.817 0.905 0.346 0.307 1.000 0.675 0.372 0.357 0.441
## TAM 0.125 0.744 0.746 0.286 0.266 0.675 1.000 0.227 0.289 0.331
## LEI 0.267 0.254 0.375 0.385 0.355 0.372 0.227 1.000 0.403 0.655
## VER 0.734 0.332 0.354 0.839 0.927 0.357 0.289 0.403 1.000 0.748
## CAS 0.576 0.367 0.442 0.700 0.698 0.441 0.331 0.655 0.748 1.000
# saveRDS(euclimat, file = "../../data/EnvMat/EucliMatrixProvenancesP1.rds")
Following Jarquín et al. (2014).
This is the option used in the mannuscript.
varmat <-as.matrix(var(t(dat)))
round(varmat,3)[1:10,1:10]
## MIM CEN ORI STJ HOU CUE TAM LEI VER CAS
## MIM 1.400 -0.893 -1.154 1.144 1.072 -1.087 -1.069 -0.195 0.947 0.438
## CEN -0.893 1.007 0.920 -0.697 -0.606 0.867 1.153 -0.779 -0.579 -0.639
## ORI -1.154 0.920 1.029 -0.929 -0.850 0.972 1.073 -0.218 -0.768 -0.511
## STJ 1.144 -0.697 -0.929 0.950 0.900 -0.851 -0.904 -0.231 0.804 0.345
## HOU 1.072 -0.606 -0.850 0.900 0.861 -0.765 -0.831 -0.316 0.771 0.292
## CUE -1.087 0.867 0.972 -0.851 -0.765 0.966 0.881 -0.214 -0.667 -0.454
## TAM -1.069 1.153 1.073 -0.904 -0.831 0.881 1.681 -0.805 -0.850 -0.798
## LEI -0.195 -0.779 -0.218 -0.231 -0.316 -0.214 -0.805 1.893 -0.192 0.676
## VER 0.947 -0.579 -0.768 0.804 0.771 -0.667 -0.850 -0.192 0.708 0.309
## CAS 0.438 -0.639 -0.511 0.345 0.292 -0.454 -0.798 0.676 0.309 0.449
heatmap(varmat)
heatmap.2(as.matrix(varmat),scale="none",
col=brewer.pal(11,"RdBu"),
trace="none",key=F,
cexRow=1.5,cexCol=1.5,
margins=c(12,8))
## png
## 2
is.symmetric.matrix(varmat)
## [1] TRUE
is.positive.definite(varmat)
## [1] FALSE
varmat.pd <- nearPD(varmat)$mat
all.equal(as.matrix(varmat.pd),varmat)
## [1] "Mean relative difference: 2.490587e-07"
# saveRDS(varmat.pd, file = "../../data/cleaned-data/EnvMat/VarCovMatProvenancesP1.rds")
data <- readRDS(file="../../data/TrainP1.RDS")
Keeping the columns of interest
data <- data[,c("prov",grep("_prov",colnames(data),value=T))]
data <- data[,-(grep("tude",colnames(data)))]
# Removing soil variables
data <- data[,-(grep("_sub_",colnames(data)))]
data <- data[,-(grep("_top_",colnames(data)))]
data <- data[,-(grep("_roots_",colnames(data)))]
Keep only unique values:
data <- unique(data)
Checking NAs
sapply(data, function(x) sum(is.na(x)))
## prov bio1_prov bio2_prov bio5_prov bio6_prov bio12_prov bio13_prov bio14_prov tmean.djf_prov tmean.mam_prov
## 0 0 0 0 0 0 0 0 0 0
## tmean.jja_prov tmean.son_prov prec.djf_prov prec.mam_prov prec.jja_prov prec.son_prov pet.mean_prov pet.min_prov pet.max_prov ppet.mean_prov
## 0 0 0 0 0 0 0 0 0 0
## ppet.min_prov ppet.max_prov
## 0 0
Mean-centered variables
data[,paste0(grep("_prov",names(data),value = T),".sc")] <- scale(data[,grep("_prov",names(data),value = T)])
cor <- cor(data[,grep("_prov.sc",names(data),value = T)])
corrplot::corrplot(cor,method="number")
# matrix of the p-value of the correlation
p.mat <- corpmat(cor)
corrplot::corrplot(cor, method="color", col=col(200),
type="upper", order="hclust",
addCoef.col = "black", # Add coefficient of correlation
tl.col="black", tl.srt=45, #Text label color and rotation
# Combine with significance
p.mat = p.mat, sig.level = 0.01, insig = "blank",
# hide correlation coefficient on the principal diagonal
diag=FALSE)
pca <- prcomp(data[,grep("_prov.sc",names(data),value = T)], center = TRUE,scale. = TRUE)
ggbiplot(pca,varname.size =4) + ylim(-4.5, 2.5) + xlim(-3, 3) +
theme_minimal(base_size = 25) +
theme(plot.title = element_text(size=18))
We didn’t keep bio13 because it was highly correlated to bio12. And we didn’t keep bio6 because it was correlated to bio1.
cat(" data:","\n","corr bio12-bio13: ",cor(data$bio12_prov.sc,data$bio13_prov.sc), "\n",
"corr bio5-bio6: ",cor(data$bio1_prov.sc,data$bio6_prov.sc),"\n","\n")
## data:
## corr bio12-bio13: 0.9629376
## corr bio5-bio6: 0.7811049
##
Selected variables:
bio1_prov.s (mean.temp in the manuscript): the average of the annual daily mean temperature (°C).
bio5_prov.sc (max.temp in the manuscript): the average of the maximum temperature of the warmest month (°C).
bio12_prov.sc (min.pre in the manuscript): the average of the precipitation of the driest month (mm).
bio14_prov.sc (mean.pre in the manuscript): the average of the annual precipitation (mm).
select.var <- paste0("bio",c(1,5,12,14),"_prov.sc")
cor(data[,select.var])
## bio1_prov.sc bio5_prov.sc bio12_prov.sc bio14_prov.sc
## bio1_prov.sc 1.000000000 -0.06119027 0.1806672 0.001350035
## bio5_prov.sc -0.061190273 1.00000000 -0.6213026 -0.725394044
## bio12_prov.sc 0.180667187 -0.62130263 1.0000000 0.645110058
## bio14_prov.sc 0.001350035 -0.72539404 0.6451101 1.000000000
vif(data[,select.var])
## Variables VIF
## 1 bio1_prov.sc 1.059054
## 2 bio5_prov.sc 2.307608
## 3 bio12_prov.sc 1.967315
## 4 bio14_prov.sc 2.474264
2 options:
A matrix of Euclidean distance following Thomson et al. (2018).
A variance-covariance matrix following Jarquín et al. (2014).
dat <- unique(data[,c("prov",select.var)])
row.names(dat) <- dat$prov
dat$prov <- NULL
round(dat,3)
## bio1_prov.sc bio5_prov.sc bio12_prov.sc bio14_prov.sc
## MIM 0.486 -0.590 1.626 2.033
## CEN -1.184 0.943 -1.111 -0.823
## ORI -0.235 1.249 -0.860 -0.942
## STJ -0.365 -1.207 0.419 1.042
## HOU 0.076 -0.596 0.786 1.561
## CUE -0.259 0.999 -1.262 -0.838
## TAM -1.149 1.614 -0.024 -1.107
## LEI 1.906 -0.899 0.154 -1.103
## VER 0.153 -0.661 0.481 1.373
## CAS 0.898 -0.706 0.354 0.325
## PIA 2.255 0.373 -0.592 -1.141
## ARM -0.638 -0.788 0.440 0.590
## PET 0.484 -0.598 1.677 2.107
## VAL -0.371 1.164 -1.234 -0.856
## SAL -1.989 0.153 -0.627 0.033
## OLO -0.301 -1.132 0.390 1.097
## CAD 0.537 -0.821 0.379 0.631
## ARN 0.868 2.004 -1.339 -1.096
## BAY -1.127 0.680 -0.899 -0.244
## SIE 0.532 -0.785 0.359 0.568
## SEG 0.627 -0.636 1.966 0.590
## PLE -1.053 -1.568 0.296 0.987
## BON -1.437 0.515 -0.925 -0.412
## COC -0.411 0.966 -1.256 -0.838
## SAC 0.159 -0.636 2.213 0.182
## QUA 1.992 1.005 -0.971 -0.886
## CAR -0.568 0.810 -1.145 -0.831
## OLB -1.000 -0.212 -0.938 -0.207
## PIE -0.959 -1.251 0.092 -0.932
## PUE 0.898 -0.711 0.288 0.455
## ALT -0.274 -0.680 0.343 0.471
## LAM 0.828 -0.822 0.317 0.700
## MAD -0.425 1.169 1.400 -1.303
## COM 1.048 1.656 -0.796 -1.186
Following Thomson et al. (2018).
I did not select this option in the manuscript as I did not know how to include a euclidean matrix in brms.
euclimat <-as.matrix(dist(dat, method="euclidean", diag=TRUE,upper=TRUE))
round(euclimat,3)[1:10,1:10]
## MIM CEN ORI STJ HOU CUE TAM LEI VER CAS
## MIM 0.000 4.559 4.351 1.882 1.047 4.434 4.484 3.756 1.364 2.172
## CEN 4.559 0.000 1.035 3.334 3.639 0.939 1.310 3.823 3.423 3.243
## ORI 4.351 1.035 0.000 3.409 3.532 0.485 1.302 3.202 3.310 2.861
## STJ 1.882 3.334 3.409 0.000 0.986 3.353 3.659 3.150 0.825 1.538
## HOU 1.047 3.639 3.532 0.986 0.000 3.551 3.763 3.307 0.372 1.550
## CUE 4.434 0.939 0.485 3.353 3.551 0.000 1.666 3.220 3.294 2.865
## TAM 4.484 1.310 1.302 3.659 3.763 1.666 0.000 3.960 3.644 3.430
## LEI 3.756 3.823 3.202 3.150 3.307 3.220 3.960 0.000 3.061 1.770
## VER 1.364 3.423 3.310 0.825 0.372 3.294 3.644 3.061 0.000 1.293
## CAS 2.172 3.243 2.861 1.538 1.550 2.865 3.430 1.770 1.293 0.000
# then scales so that the values are between 0 and 1
euclimat <- 1- euclimat/max(euclimat, na.rm=TRUE)
round(euclimat,3)[1:10,1:10]
## MIM CEN ORI STJ HOU CUE TAM LEI VER CAS
## MIM 1.000 0.110 0.151 0.633 0.796 0.135 0.125 0.267 0.734 0.576
## CEN 0.110 1.000 0.798 0.349 0.290 0.817 0.744 0.254 0.332 0.367
## ORI 0.151 0.798 1.000 0.335 0.311 0.905 0.746 0.375 0.354 0.442
## STJ 0.633 0.349 0.335 1.000 0.808 0.346 0.286 0.385 0.839 0.700
## HOU 0.796 0.290 0.311 0.808 1.000 0.307 0.266 0.355 0.927 0.698
## CUE 0.135 0.817 0.905 0.346 0.307 1.000 0.675 0.372 0.357 0.441
## TAM 0.125 0.744 0.746 0.286 0.266 0.675 1.000 0.227 0.289 0.331
## LEI 0.267 0.254 0.375 0.385 0.355 0.372 0.227 1.000 0.403 0.655
## VER 0.734 0.332 0.354 0.839 0.927 0.357 0.289 0.403 1.000 0.748
## CAS 0.576 0.367 0.442 0.700 0.698 0.441 0.331 0.655 0.748 1.000
# saveRDS(euclimat, file = "../../data/EnvMat/EucliMatrixProvenancesP2.rds")
Following Jarquín et al. (2014).
This is the option used in the mannuscript.
varmat <-as.matrix(var(t(dat)))
round(varmat,3)[1:10,1:10]
## MIM CEN ORI STJ HOU CUE TAM LEI VER CAS
## MIM 1.400 -0.893 -1.154 1.144 1.072 -1.087 -1.069 -0.195 0.947 0.438
## CEN -0.893 1.007 0.920 -0.697 -0.606 0.867 1.153 -0.779 -0.579 -0.639
## ORI -1.154 0.920 1.029 -0.929 -0.850 0.972 1.073 -0.218 -0.768 -0.511
## STJ 1.144 -0.697 -0.929 0.950 0.900 -0.851 -0.904 -0.231 0.804 0.345
## HOU 1.072 -0.606 -0.850 0.900 0.861 -0.765 -0.831 -0.316 0.771 0.292
## CUE -1.087 0.867 0.972 -0.851 -0.765 0.966 0.881 -0.214 -0.667 -0.454
## TAM -1.069 1.153 1.073 -0.904 -0.831 0.881 1.681 -0.805 -0.850 -0.798
## LEI -0.195 -0.779 -0.218 -0.231 -0.316 -0.214 -0.805 1.893 -0.192 0.676
## VER 0.947 -0.579 -0.768 0.804 0.771 -0.667 -0.850 -0.192 0.708 0.309
## CAS 0.438 -0.639 -0.511 0.345 0.292 -0.454 -0.798 0.676 0.309 0.449
heatmap(varmat)
heatmap.2(as.matrix(varmat),scale="none",
col=brewer.pal(11,"RdBu"),
trace="none",key=F,
cexRow=1.5,cexCol=1.5,
margins=c(12,8))
## png
## 2
is.symmetric.matrix(varmat)
## [1] TRUE
is.positive.definite(varmat)
## [1] FALSE
varmat.pd <- nearPD(varmat)$mat
all.equal(as.matrix(varmat.pd),varmat)
## [1] "Mean relative difference: 2.490587e-07"
# saveRDS(varmat.pd, file = "../../data/cleaned-data/EnvMat/VarCovMatProvenancesP2.rds")
Jarquín, Diego, José Crossa, Xavier Lacaze, Philippe Du Cheyron, Joëlle Daucourt, Josiane Lorgeou, François Piraux, et al. 2014. “A Reaction Norm Model for Genomic Selection Using High-Dimensional Genomic and Environmental Data.” Theoretical and Applied Genetics 127 (3): 595–607. doi:10.1007/s00122-013-2243-1.
Thomson, Caroline Elizabeth, Isabel Sophie Winney, Oceane Salles, and Benoit Pujol. 2018. “A Guide to Using a Multiple-Matrix Animal Model to Disentangle Genetic and Nongenetic Causes of Phenotypic Variance.” bioRxiv, May, 318451. doi:10.1101/318451.